{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 采用 xgboost 模型完成商品分类"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 调用模型进行测试"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "首先导入必要的工具包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "import math\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###   数据读取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# path to where the data lies\n",
    "path = './data/' \n",
    "reain_file_name = 'RentListingInquries_FE_train.csv'\n",
    "test_file_name = 'RentListingInquries_FE_test.csv'\n",
    "\n",
    "data_train = pd.read_csv(path + reain_file_name)\n",
    "data_test = pd.read_csv(path + test_file_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 从原始数据中分离输入特征x和输出y\n",
    "X = data_train.drop('interest_level', axis = 1)\n",
    "y = data_train['interest_level']\n",
    "\n",
    "predict_x = data_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将数据分割训练数据与测试数据\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# 随机采样20%的数据构建测试样本，其余作为训练样本\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state = 0)\n",
    "#print('X_train.shape: %s %s'%X_train.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "根据调整后的参数得到训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=0.7,\n",
       "       colsample_bytree=0.8, gamma=0, learning_rate=0.1, max_delta_step=0,\n",
       "       max_depth=5, min_child_weight=1, missing=None, n_estimators=187,\n",
       "       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,\n",
       "       reg_alpha=2, reg_lambda=2, scale_pos_weight=1, seed=3, silent=True,\n",
       "       subsample=0.8)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#xgboost.XGBClassifier(max_depth=3, learning_rate=0.1,\n",
    "#n_estimators=100, silent=True, objective='binary:logistic', nthread=-1,\n",
    "#gamma=0, min_child_weight=1, max_delta_step=0, subsample=1,\n",
    "#colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1,\n",
    "#scale_pos_weight=1, base_score=0.5, random_state=0, seed=None,\n",
    "#missing=None , **kwargs )\n",
    "\n",
    "bst = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=187,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.8,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        reg_alpha = 2, \n",
    "        reg_lambda = 2,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "bst.fit(X_train , y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/anaconda3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([2, 1, 2, ..., 2, 0, 2])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predict_y = bst.predict(predict_x)\n",
    "predict_y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(predict_y).to_csv('predict_y.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
